# Load necessary libraries
library(dplyr)
library(openxlsx) 
library(readxl) 
library(umap)
library(tidyverse)
library(caret)
library(ggplot2)
library(plotly)
# Load the dataset
data(iris)
iris_df <- as.data.frame(iris)

# Remove duplicate rows
iris_df <- distinct(iris_df)

# Extract only numerical variables
sampled_var <- iris_df %>% select(-Species)

2D-UMAP

# Set UMAP configuration
umap_config <- umap.defaults
umap_config$n_neighbors <- 15  # Number of neighbors (default: 15)
umap_config$min_dist <- 0.1    # Minimum distance between points (default: 0.1)
umap_config$n_components <- 2  # 2D UMAP

# Run UMAP
set.seed(1234567)
sampled_var.umap <- umap(sampled_var, config = umap_config)

# Convert UMAP output to a dataframe
layout <- as.data.frame(sampled_var.umap$layout)
colnames(layout) <- c("UMAP_1", "UMAP_2")

# Add class labels
layout$classlabels <- iris_df$Species

# Plot UMAP results
ggplot(data = layout, aes(x = UMAP_1, y = UMAP_2, color = classlabels)) +
  geom_point(size = 3, alpha = 0.7) +
  scale_color_manual(values = c("setosa" = "red", "versicolor" = "blue", "virginica" = "green")) +
  labs(title = "UMAP Visualization of Iris Dataset", 
       x = "UMAP - 1", 
       y = "UMAP - 2", 
       color = "Species") +
  theme_classic()

3D-UMAP

# Set UMAP configuration for 3D projection
umap_config <- umap.defaults
umap_config$n_neighbors <- 15  # Number of neighbors (default: 15)
umap_config$min_dist <- 0.1    # Minimum distance between points (default: 0.1)
umap_config$n_components <- 3  # 3D UMAP

# Perform 3D UMAP
set.seed(12345)
sampled_var_3d_umap <- umap(sampled_var, config = umap_config)

# Convert UMAP output to a dataframe
layout_3d <- as.data.frame(sampled_var_3d_umap$layout)
colnames(layout_3d) <- c("UMAP_1", "UMAP_2", "UMAP_3")

# Add class labels
layout_3d$classlabels <- iris_df$Species

# Define color scheme for 3 species
my_colors <- c("setosa" = "red", "versicolor" = "blue", "virginica" = "green")

# Create an interactive 3D UMAP plot
plot_ly(data = layout_3d, 
        x = ~UMAP_1, 
        y = ~UMAP_2, 
        z = ~UMAP_3, 
        color = ~classlabels, 
        colors = my_colors, 
        type = "scatter3d", 
        mode = "markers") %>%
  layout(scene = list(
    xaxis = list(title = "UMAP 1"),
    yaxis = list(title = "UMAP 2"),
    zaxis = list(title = "UMAP 3"),
    title = "3D UMAP Visualization of Iris Dataset"
  ))